32  Decoradores en Pandas

import pandas as pd
import plotly.express as px
f = '../../data/Temixco_2018_10Min.parquet'


def resumen_estadistico(funcion):
    def wrapper(*args,**kwargs):
        df = funcion(*args,**kwargs)
        print(df.describe())
        return df
    return wrapper


@resumen_estadistico
def lee_datos(f):
    tmx = pd.read_parquet(f)
    return tmx

tmx = lee_datos(f)
tmx
                 Ib            Ig            To            RH           WS  \
count  52423.000000  52423.000000  52560.000000  52560.000000  52560.00000   
mean     236.742726    257.414344     22.838098     45.152827      1.90520   
std      327.983721    345.976954      4.443339     19.426263      1.04411   
min        0.000000      0.000000      8.160000      5.648000      0.05000   
25%        0.001000      0.000000     19.350000     29.770000      1.18000   
50%        0.260000      3.293000     22.670000     42.600000      1.78500   
75%      542.300000    533.900000     26.030000     59.280000      2.46000   
max     1021.000000   1348.000000     35.870000     97.700000     14.86000   

                 WD             P  
count  52560.000000  52560.000000  
mean     210.734453  87591.151598  
std      109.276328    245.715965  
min        0.000000  86772.650000  
25%      134.675000  87429.500000  
50%      211.900000  87595.090000  
75%      319.800000  87760.647500  
max      360.000000  88516.950000  
Ib Ig To RH WS WD P
time
2018-01-01 00:00:00 NaN NaN 18.70 36.34 1.422 316.0 87864.11
2018-01-01 00:10:00 0.002 0.0 18.95 35.29 1.008 283.7 87876.37
2018-01-01 00:20:00 0.170 0.0 18.94 35.43 1.565 326.0 87888.64
2018-01-01 00:30:00 0.371 0.0 18.77 35.89 2.175 354.5 87887.21
2018-01-01 00:40:00 0.305 0.0 18.81 36.34 1.902 348.0 87886.91
... ... ... ... ... ... ... ...
2018-12-31 23:10:00 0.125 0.0 18.51 47.29 1.715 332.2 87484.32
2018-12-31 23:20:00 0.000 0.0 18.26 48.02 1.703 320.5 87470.70
2018-12-31 23:30:00 0.044 0.0 18.39 46.84 2.887 335.7 87455.03
2018-12-31 23:40:00 0.170 0.0 17.99 47.85 1.528 358.8 87470.02
2018-12-31 23:50:00 0.003 0.0 17.75 49.65 0.598 322.3 87467.29

52560 rows × 7 columns


def agregar_diff_To(funcion):
    def wrapper(*args, **kwargs):
        df = funcion(*args, **kwargs)
        df['To_diff'] = df['To'].diff()
        return df
    return wrapper

def resumen_estadistico(funcion):
    def wrapper(*args, **kwargs):
        df = funcion(*args, **kwargs)
        print(df.describe())
        return df
    return wrapper


#La razón por la que resumen_estadistico debe ir primero y luego agregar_diff_to 
# se debe al orden de ejecución de los decoradores. Los decoradores se aplican de arriba 
# hacia abajo, pero se ejecutan de abajo hacia arriba.


@resumen_estadistico
@agregar_diff_To
def lee_datos(f):
    tmx = pd.read_parquet(f)
    return tmx


f = '../../data/Temixco_2018_10Min.parquet'
tmx = lee_datos(f)
tmx
                 Ib            Ig            To            RH           WS  \
count  52423.000000  52423.000000  52560.000000  52560.000000  52560.00000   
mean     236.742726    257.414344     22.838098     45.152827      1.90520   
std      327.983721    345.976954      4.443339     19.426263      1.04411   
min        0.000000      0.000000      8.160000      5.648000      0.05000   
25%        0.001000      0.000000     19.350000     29.770000      1.18000   
50%        0.260000      3.293000     22.670000     42.600000      1.78500   
75%      542.300000    533.900000     26.030000     59.280000      2.46000   
max     1021.000000   1348.000000     35.870000     97.700000     14.86000   

                 WD             P       To_diff  
count  52560.000000  52560.000000  52559.000000  
mean     210.734453  87591.151598     -0.000018  
std      109.276328    245.715965      0.438517  
min        0.000000  86772.650000     -8.220000  
25%      134.675000  87429.500000     -0.230000  
50%      211.900000  87595.090000     -0.020000  
75%      319.800000  87760.647500      0.220000  
max      360.000000  88516.950000      4.280000  
Ib Ig To RH WS WD P To_diff
time
2018-01-01 00:00:00 NaN NaN 18.70 36.34 1.422 316.0 87864.11 NaN
2018-01-01 00:10:00 0.002 0.0 18.95 35.29 1.008 283.7 87876.37 0.25
2018-01-01 00:20:00 0.170 0.0 18.94 35.43 1.565 326.0 87888.64 -0.01
2018-01-01 00:30:00 0.371 0.0 18.77 35.89 2.175 354.5 87887.21 -0.17
2018-01-01 00:40:00 0.305 0.0 18.81 36.34 1.902 348.0 87886.91 0.04
... ... ... ... ... ... ... ... ...
2018-12-31 23:10:00 0.125 0.0 18.51 47.29 1.715 332.2 87484.32 -0.10
2018-12-31 23:20:00 0.000 0.0 18.26 48.02 1.703 320.5 87470.70 -0.25
2018-12-31 23:30:00 0.044 0.0 18.39 46.84 2.887 335.7 87455.03 0.13
2018-12-31 23:40:00 0.170 0.0 17.99 47.85 1.528 358.8 87470.02 -0.40
2018-12-31 23:50:00 0.003 0.0 17.75 49.65 0.598 322.3 87467.29 -0.24

52560 rows × 8 columns

tmx
Ib Ig To RH WS WD P To_diff
time
2018-01-01 00:00:00 NaN NaN 18.70 36.34 1.422 316.0 87864.11 NaN
2018-01-01 00:10:00 0.002 0.0 18.95 35.29 1.008 283.7 87876.37 0.25
2018-01-01 00:20:00 0.170 0.0 18.94 35.43 1.565 326.0 87888.64 -0.01
2018-01-01 00:30:00 0.371 0.0 18.77 35.89 2.175 354.5 87887.21 -0.17
2018-01-01 00:40:00 0.305 0.0 18.81 36.34 1.902 348.0 87886.91 0.04
... ... ... ... ... ... ... ... ...
2018-12-31 23:10:00 0.125 0.0 18.51 47.29 1.715 332.2 87484.32 -0.10
2018-12-31 23:20:00 0.000 0.0 18.26 48.02 1.703 320.5 87470.70 -0.25
2018-12-31 23:30:00 0.044 0.0 18.39 46.84 2.887 335.7 87455.03 0.13
2018-12-31 23:40:00 0.170 0.0 17.99 47.85 1.528 358.8 87470.02 -0.40
2018-12-31 23:50:00 0.003 0.0 17.75 49.65 0.598 322.3 87467.29 -0.24

52560 rows × 8 columns

def grafica_Ig_Ib(tmx):
    fig = px.line(tmx, x = tmx.index, y = ['Ib','Ig']) 
    return fig


grafica_Ig_Ib(tmx)

32.1 Actividades sugeridas:

  1. Haz un decorador que valide si existen las columnas Ig e Ib y si no existe alguna de las dos, imprima las columnas existentes en el df.

Si ya lo hiciste pero quieres ver una solución, expande la celda siguiente:

def validar_columnas_ig_ib(func):
    def wrapper(df, *args, **kwargs):
        required_columns = ['Ig', 'Ib']
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            print(f"Columnas faltantes: {missing_columns}")
            print("Columnas existentes en el DataFrame:", df.columns.tolist())
            return
        return func(df, *args, **kwargs)
    return wrapper

@validar_columnas_ig_ib
def grafica_Ig_Ib(tmx):
    fig = px.line(tmx, x=tmx.index, y=['Ib', 'Ig']) 
    return fig


grafica_Ig_Ib(tmx)

33 Actividades sugeridas:

  1. Haz el clásico decorador que mide el tiempo del proceso.
  2. Haz un decorador que diga que función estás ejecutando y que argumentos estas recibiendo.
  3. Revisa el paquete https://github.com/groodt/retrying para ver como usar decoradores ya definidos